* Reset settings and initialize log file
launch, path("build/sass_builder")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Prepare data from the Schools and Staffing Survey.
*-------------------------------------------------------------------------------


* Load data on individual public-school teachers
use "$basepath/data/raw/sass/tchpub99.dta", clear

* Record teacher/school identifiers
rename cntlnum teacher_id
rename schcntl school_id
gisid teacher_id

* Process summer earnings
rename T0341 summer_teach
rename T0342 summer_teach_amt
rename T0343 summer_nonteach_sch
rename T0344 summer_nonteach_sch_amt
rename T0345 summer_nonschool
rename T0346 summer_nonschool_amt

* Process school year earnings
rename T0347 reg_salary
rename T0348 reg_sch_oth
rename T0349 reg_sch_oth_amt
rename T0350 reg_sch_bonus
rename T0351 reg_sch_bonus_amt
rename T0352 reg_other
rename T0353 reg_other_amt
rename T0354 reg_other_type

* Standardize indicator variables
foreach v in summer_teach summer_nonteach_sch summer_nonschool reg_sch_oth reg_sch_bonus reg_other {
	replace `v' = 0 if `v' == 2
}

* Label dollar amounts
label define amt_lbl 1 "<$1,000", replace
label define amt_lbl 2 "$1,000-$1,999", add
label define amt_lbl 3 "$2,000-$3,999", add
label define amt_lbl 4 "$4,000+", add

foreach var in summer_teach_amt summer_nonteach_sch_amt summer_nonschool_amt {
	replace `var' = . if `var' == -8
	label values `var' amt_lbl
}

* Label baseline teacher pay during school year
label define reg_salary_lbl 1 "<$25,001", replace
label define reg_salary_lbl 2 "$25,001 to $30,000", add
label define reg_salary_lbl 3 "$30,001 to $35,000", add
label define reg_salary_lbl 4 "$35,001 to $45,000", add
label define reg_salary_lbl 5 "$45,001+", add
label values reg_salary reg_salary_lbl

* Label additional salary during school year
label define reg_sch_oth_amt_lbl 1 "<$600", replace
label define reg_sch_oth_amt_lbl 2 "$600 to $1,499", add
label define reg_sch_oth_amt_lbl 3 "$1,500 to $2,999", add
label define reg_sch_oth_amt_lbl 4 "$3,000 or more", add
label values reg_sch_oth_amt reg_sch_oth_amt_lbl

* Label bonus salary during school year
label define reg_sch_bonus_amt_lbl 1 "<$500", replace
label define reg_sch_bonus_amt_lbl 2 "$600 to $999", add
label define reg_sch_bonus_amt_lbl 3 "$1,000 to $1,999", add
label define reg_sch_bonus_amt_lbl 4 "$2,000 or more", add
label values reg_sch_bonus_amt reg_sch_bonus_amt_lbl

* Label outside pay during the school year
label define reg_other_amt_lbl 1 "<$1,000", replace
label define reg_other_amt_lbl 2 "$1,000-$2,999", add
label define reg_other_amt_lbl 3 "$3,000-$5,999", add
label define reg_other_amt_lbl 4 "$6,000+", add
label values reg_other_amt reg_other_amt_lbl

* Label type of outside activity
label define reg_other_type_lbl 1 "Teaching/tutoring", replace
label define reg_other_type_lbl 2 "Non-teaching, but related to teaching", add
label define reg_other_type_lbl 3 "Other", add
label values reg_other_type reg_other_type_lbl

foreach v in reg_sch_oth_amt reg_sch_bonus_amt reg_other_amt reg_other_type {
	replace `v' = . if `v' == -8
}

* Create an indicator for ANY summer work
gen byte summer_any = (summer_teach == 1 | summer_nonteach_sch == 1 | summer_nonschool == 1)

* Process sex
assert inlist(T0356, 1, 2)
gen byte female = (T0356 == 2)

* Process race
assert inlist(T0357, 1, 2, 3, 4)
rename T0357 race
label define race 1 "American Indian", replace
label define race 2 "Asian/Pacific Islander", add
label define race 3 "Black", add
label define race 4 "White", add
label values race race

* Process ethnicity
assert inlist(T0359, 1, 2)
gen byte hispanic = (T0359 == 1)

* Create a single race/ethnicity variable
gen byte wbho = .
replace wbho = 1 if race == 4 & hispanic == 0
replace wbho = 2 if race == 3 & hispanic == 0
replace wbho = 3 if hispanic == 1
replace wbho = 4 if wbho == .

label define wbho_lbl 1 "White non-Hispanic", replace
label define wbho_lbl 2 "Black non-Hispanic", add
label define wbho_lbl 3 "Hispanic or Latinx", add
label define wbho_lbl 4 "Other non-Hispanic", add
label values wbho wbho_lbl

* Process age groups
assert inlist(AGE_T, 1, 2, 3, 4)
rename AGE_T age_cat
label define age_cat_lbl 1 "<30 years", replace
label define age_cat_lbl 2 "30-39 years", add
label define age_cat_lbl 3 "40-49 years", add
label define age_cat_lbl 4 "50+ years", add
label values age_cat age_cat_lbl

* Create an indicator for having a bachelor's degree
assert inlist(T0070, 1, 2)
gen byte bachelors = (T0070 == 1)

* Create an indicator for having a master's degree (= 0 if no bachelor's)
assert inlist(T0080, 1, 2) == bachelors
gen byte masters = (T0080 == 1)

* Job characteristics
rename T0051 main_assign
label define main_assign_lbl  1 "Regular full-time teacher", replace
label define main_assign_lbl  2 "Regular part-time teacher", add
label define main_assign_lbl  3 "Itinerant teacher", add
label define main_assign_lbl  4 "Long-term substitute teacher", add
label define main_assign_lbl  5 "Short-term substitute teacher", add
label define main_assign_lbl  6 "Student teacher", add
label define main_assign_lbl  7 "Teacher aide", add
label define main_assign_lbl  8 "Administrator", add
label define main_assign_lbl  9 "Librarian", add
label define main_assign_lbl 10 "Other professional staff", add
label define main_assign_lbl 11 "Support staff", add
label values main_assign main_assign_lbl

* Field of assignment
label define assign_lbl 1 "Pre-K, kindergarten, general elementary", replace
label define assign_lbl 2 "Math/science", add
label define assign_lbl 3 "English/language arts", add
label define assign_lbl 4 "Social science", add
label define assign_lbl 5 "Special education", add
label define assign_lbl 6 "Foreign languages", add
label define assign_lbl 7 "Bilingual/ESL", add
label define assign_lbl 8 "Vocational/technical education", add
label define assign_lbl 9 "All others", add
label values assign assign_lbl

* School level
label define schlevel_lbl 1 "Elementary", replace
label define schlevel_lbl 2 "Secondary", add
label define schlevel_lbl 3 "Combined", add
label values schlevel schlevel_lbl

* Hours variables
rename T0273 hours
rename T0276 hours_sch_act
rename T0277 hours_sch_prep
assert hrsall == hours + hours_sch_act + hours_sch_prep
drop hrsall

* Sampling variables
label define urbanic_lbl 1 "Large/mid-size city", replace
label define urbanic_lbl 2 "Urban fringe", add
label define urbanic_lbl 3 "Small town/rural", add
label values urbanic urbanic_lbl

destring region, replace
label define region_lbl 1 "Northeast", replace
label define region_lbl 2 "Midwest", add
label define region_lbl 3 "South", add
label define region_lbl 4 "West", add
label values region region_lbl

* Retain variables of interest
keep teacher_id school_id tfnlwgt female wbho age_cat bachelors masters main_assign assign schlevel totexper urbanic region hours* summer* reg*
order teacher_id school_id tfnlwgt female wbho age_cat bachelors masters main_assign assign schlevel totexper urbanic region hours* summer* reg*

* Label variables
label variable teacher_id "Unique teacher ID"
label variable school_id  "School ID"
label variable female     "Female"
label variable wbho       "Race/ethnicity"
label variable age_cat    "Age (binned)"
label variable bachelors  "Has bachelor's degree"
label variable masters    "Has master's degree"
label variable schlevel   "School level"
label variable region     "Census region"
label variable summer_any "Any summer work"

* Save the cleaned extract
sort teacher_id
compress
save "$basepath/data/derived/sass_teacher.dta", replace

* Close the log file
unlaunch
